#delimit;
clear;
set more off;

global pathlog  ~/research/pollution/logfiles/;

set mem 1500m;

capture log close;
log using $pathlog/table.log, replace;

/*================================================
 Program: tables.do
 Author:  Avi Ebenstein
 Created: June 2010
 Purpose: Tables in the paper.
=================================================*/

use ~/research/pollution/datafiles/dsp_basins, replace;

*******************************************;
* Table 1 - Death rates by cause/urban/sex ;
*******************************************;

label define dummy 1 " ",add;
label values dummy dummy;

capture log close;
log using ~/research/pollution/logfiles/table1.log, replace;
table dummy urban [w=round(malepop,1)], c(mean deathrate_m mean cancer_m mean maledr9 mean maledr10 mean heart_m) stubwidth(8) cellwidth(9) csepwidth(1) f(%9.2fc);
table dummy urban [w=round(malepop,1)], c(mean stroke_m mean respiratory_m mean violent_m mean other_m) stubwidth(8) cellwidth(9) csepwidth(1) f(%9.2fc);

table dummy urban [w=round(femalepop,1)], c(mean deathrate_f mean cancer_f mean femaledr9 mean femaledr10 mean heart_f) stubwidth(8) cellwidth(9) csepwidth(1) f(%9.2fc);
table dummy urban [w=round(femalepop,1)], c(mean stroke_f mean respiratory_f mean violent_f mean other_f) stubwidth(8) cellwidth(9) csepwidth(1) f(%9.2fc);

table dummy urban [w=round(malepop,1)], c(mean maledr_090 mean maledr_091 mean maledr_095 mean maledr_101) stubwidth(8) cellwidth(9) csepwidth(1) f(%9.2fc);
table dummy urban [w=round(femalepop,1)], c(mean femaledr_090 mean femaledr_091 mean femaledr_095 mean femaledr_101) stubwidth(8) cellwidth(9) csepwidth(1) f(%9.2fc);

log close;

**********************************************************;
* Table 2 - Sample means stratified by clean/dirty rivers ;
**********************************************************;

use ~/pollution/datafiles/dsp_basins,clear;
/* North south with the western rivers recoded as the exception */
replace rivercat=2 if rnumber==4;
table rsystem rivercat, c(mean overall_q);

gen ruralinc=.;
replace ruralinc=1 if cla3=="rural4";
replace ruralinc=2 if cla3=="rural3";
replace ruralinc=3 if cla3=="rural2";
replace ruralinc=4 if cla3=="rural1";

bysort rivercat: gen systemtot=_N;
format frmup_flow %9.0fc;

*tabstat deathrate9 overall_q a_n bod dissolved_ lead mercury oils permanga_n volatile_p yrsed farmer urban production airpollution rainfall frmup_flow share_tap_water1 systemtot [w=totalpop], by(rivercat) f(%9.3fc) columns(statistics) nototal;

global varlist "deathrate9 overall_q a_n bod dissolved_ lead mercury oils permanga_n volatile_p yrsed farmer urban production airpollution rainfall frmup_flow share_tap_water1 ruralinc";


foreach var of global varlist{;
                                                                                          qui ttest `var',by(rivercat);
                                                                                          gen ttest_`var'=r(t);
                                                                                          gen se_`var'=r(se);
                                                                                          gen p_`var'=r(p);
                            };

foreach var of global varlist{;
                              table dummy rivercat [fw=round(totalpop,1)] , c(mean `var' sd `var' mean se_`var' mean p_`var');
                            };

************************************************;
* Table 3 - OLS regressions - see olsregs.do    ;
* Table 4 - column 1/2 - see sexregs_revised.do ;
* Table 4 - column 3/4 - see tapregs.do         ;
************************************************;

************************************************;
* Table 5 - Smoking Rates/Diet by Water Quality ;
************************************************;

use ~/pollution/datafiles/smoking_data, replace;
gen quality_level=round(overall_q);
gen totpop=malepop+femalepop;
table quality_level [w=round(totpop,1)], c(mean msmoker mean fsmoker);
tabstat msmoker fsmoker [w=round(totpop,1)],          by(quality_level) stats(mean sd semean);

tab quality_level,gen(qualitydum);
reg msmoker qualitydum*,nocons cluster(province) robust;
test qualitydum1=qualitydum2=qualitydum3=qualitydum4=qualitydum5=qualitydum6;
reg msmoker quality_level, cluster(province) robust;
reg fsmoker qualitydum*,nocons cluster(province) robust;
test qualitydum1=qualitydum2=qualitydum3=qualitydum4=qualitydum5=qualitydum6;
reg fsmoker quality_level, cluster(province) robust;

use ~/pollution/datafiles/diet_data, clear;
gen quality_level=round(overall_q);
gen totpop=malepop+femalepop;
table quality_level [w=round(totpop,1)], c(mean kcal mean carbo mean fat mean protein);
tabstat kcal carbo fat protein [w=round(totpop,1)], by(quality_level) stats(mean sd semean);

tab quality_level,gen(qualitydum);
global mylist "kcal carbo fat protein";
foreach food of global mylist{;
reg `food' qualitydum* [w=totpop],nocons cluster(province) robust;
test qualitydum1=qualitydum2=qualitydum3=qualitydum4=qualitydum5=qualitydum6;
reg `food' quality_level, cluster(province) robust;                              
                            };
**************************************************;
* Table 6 - other causes regs - regs_cd.do        ;
* Table 7 - reduced form - see regs_cd_rf.do      ;
* Table 8 - IV regressions - see tributaryregs.do ;
* Table 9 - Levy regressions - see levyregs.do    ;
**************************************************;

************************************************;
* Table 1A - sample means by DSP classification ;
************************************************;

use ~/research/pollution/datafiles/dsp_basins,clear;
tabstat home hospital farmer yrsed  totaldeaths totalpop stomach lung,by(urbcat) stats(mean sd) nototal;
tab urbcat;
table dummy urbcat, c(sum totaldeaths sum totalpop) f(%9.0f);

************************************************;
* Table 2A - sample means by river system       ;
************************************************;

use ~/research/pollution/datafiles/waterpoints_data,clear;
global mylist "overall_q a_n bod dissolved_oxygen lead mercury oils permanganate volatile_phenol";
foreach i of global mylist{;
                           replace `i'=. if `i'==0;
                         };
tabstat overall_q a_n bod dissolved_oxygen lead mercury oils permanganate volatile_phenol, by(rcode) f(%9.3fc);


************************************;
* Table 4A - Comparsion with the US ;
************************************;

use ~/research/pollution/datafiles/dsp_basins,clear;
table dummy [w=round(malepop,1)], c(mean deathrate_m mean cancer_m mean maledr9 mean maledr10 mean heart_m) stubwidth(8) cellwidth(9) csepwidth(1) f(%9.2fc);

table dummy  [w=round(femalepop,1)], c(mean deathrate_f mean cancer_f mean femaledr9 mean femaledr10 mean heart_f) stubwidth(8) cellwidth(9) csepwidth(1) f(%9.2fc);

